[1] 38 [1] TRUE [1] 454 38
ATP DEX IFNy IL4 LPS R848 TNFa unstim
5 10 79 8 128 42 47 135
ununstim 42
# The variable to be tested must be a fixed effect
names(metadata_cultured) = tolower(names(metadata_cultured))
form <- ~ age + (1|donor_id) + stimulation + picard_pct_ribosomal_bases + region + picard_pct_mrna_bases + age + picard_pct_percent_duplication + picard_pct_pf_reads_aligned
# estimate weights using linear mixed model of dream
#vobjDream = voomWithDreamWeights( genes_counts_cultured, form, metadata_cultured )
# Fit the dream model on each gene
#fit = (dream( vobjDream, form, metadata_cultured ))
#res_age <- data.frame(topTable(fit, coef='age', number=nrow(genes_counts_cultured), sort.by = "p"), check.names = F)
#male age as coefficient
#save(res_age, file ="res_age.Rdata")load("/Users/gijsjesnijders/Documents/MiGASti/Databases/res_age.Rdata")
load("~/Downloads/genes_counts_cultured.Rdata")
gencode_30 = read.table("~/Documents/MiGASti/Databases/ens.geneid.gencode.v30")
colnames(gencode_30) = c("ensembl","symbol")
res_age <- tibble::rownames_to_column(res_age, "ensembl")
res_name_age <- merge(res_age, gencode_30, by = "ensembl")
sign_age <- subset(res_age, adj.P.Val < 0.15)
length(rownames(sign_age))## [1] 1348
sign_age <- subset(res_name_age, adj.P.Val < 0.10)
length(rownames(sign_age))## [1] 789
sign_age <- subset(res_name_age, adj.P.Val < 0.05)
length(rownames(sign_age))## [1] 311
res = res_name_age
p = ggplot(res, aes(P.Value))
p + geom_density(color="darkblue", fill="lightblue") +
theme_classic() +
ggtitle("FDR Distribution")p = ggplot(res, aes(logFC))
p + geom_density(color = "darkblue", fill = "lightblue") +
theme_classic() +
ggtitle("Fold Change Distribution")plot.data = res
plot.data$id = rownames(plot.data)
data = data.frame(plot.data)
data$P.Value = -log10(data$P.Value)
data$fifteen = as.factor(abs(data$adj.P.Val < 0.05))
ma = ggplot(data, aes(AveExpr, logFC, color = fifteen))
ma + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c ("> 0.05", "< 0.05")) +
labs(title = "MA plot", color = "labels") +
theme_classic()#theme(plot.title = element_text(hjust = 0.5)) + ylim (-10,10) + xlim(-4,22)vp = ggplot(data, aes(logFC, P.Value, color = fifteen))
vp + geom_point() +
scale_color_manual(values = c("black", "red"), labels = c("> 0.05", "< 0.05")) +
labs(title = "Gene Level Volcano Plot", color = "FDR") +
#theme(plot.title = element_text(hjust = 0.5)) +
theme_classic() +
xlim(-1,1) + ylim(0, 20) + ylab("-log10 pvalue")setwd("~/Documents/MiGA/Revision")
MIGA = "~/Documents/MiGA/Revision/AgeMIGA.xlsx"
MIGA = read_excel(MIGA, col_names = TRUE)
MIGA = as.data.frame(MIGA)
overlap <- merge(sign_age, MIGA, by = "symbol")
list(overlap$symbol)## [[1]]
## [1] "A1BG" "A1BG-AS1" "AC022568.1" "AC129507.1" "AGAP2-AS1"
## [6] "AIM2" "AL445524.1" "ANXA1" "AP3M2" "APBA2"
## [11] "ATPAF1" "BBOF1" "BX640514.2" "C1orf162" "C22orf34"
## [16] "CBY1" "CD163" "CDYL2" "CHCHD6" "CLEC2B"
## [21] "CLEC4D" "CLIC2" "CMAHP" "CPNE8" "CST7"
## [26] "CSTA" "DDO" "DOCK5" "EFEMP2" "EMP2"
## [31] "FSTL5" "GGT1" "GOLGA6L7" "HOTAIRM1" "IL15"
## [36] "IL2RG" "LINC00996" "LINC02432" "LSP1" "LTA4H"
## [41] "MAP1LC3A" "MCTP1" "MOV10L1" "MS4A6A" "MT2A"
## [46] "NMRAL2P" "NR3C1" "NUDT16P1" "NUPR1" "NXF3"
## [51] "PAQR5" "PBX3" "PFKP" "PPP1R26P1" "PSTPIP1"
## [56] "PTPN7" "S100A4" "SELL" "SH3BP5" "SLC29A3"
## [61] "SMIM14" "SMIM3" "SPATS2L" "TSTD1" "VNN2"
## [66] "VSIG4" "ZFPM2-AS1" "ZNF24" "ZNF287" "ZNF300P1"
## [71] "ZNF454" "ZNF804A"
set1 <- sign_age
set2 <- MIGA
set1_v <- set1$symbol
set2_v <- set2$symbol
setEnrichment <- function(set1, set2, universe = 20000){
a = sum(set1 %in% set2)
c = length(set1) - a
b = length(set2) - a
d = universe - length(set2) - c
contingency_table = matrix(c(a, c, b, d), nrow = 2)
# one-tailed test for enrichment only
fisher_results = fisher.test(contingency_table, alternative = "greater")
# returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value
df <- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)
return(df)
}
setEnrichment( set1 = set1_v, set2 = set2_v)#note. pvalues based on wilcox test of log2(tpm)+1 without additional correction(not based on pvalue DEGs with DREAM)
metadata <- read.table("~/Documents/MiGASti/Databases/metadata.txt", quote="\"", comment.char="")
#set rownames to Sample
row.names(metadata) <- metadata$Sample
#exclude samples that did not pass QC
exclude <- read.csv("~/Documents/MiGASti/Databases/samples2remove.txt", sep="")
exclude <- exclude$x
genes_tpm_filt = genes_tpm[, !colnames(genes_tpm) %in% exclude]
#Excludes the samples from filters.
#dim(genes_tpm_filt)
metadata_filt = metadata[ !(rownames(metadata) %in% exclude), ]
length(metadata_filt)## [1] 38
genes_tpm_filt = log2((genes_tpm_filt) + 1)
genes_tpm_filt <- as.data.frame(genes_tpm_filt)
genes_tpm_ordered <- genes_tpm_filt[,rownames(metadata_filt)]
#head(genes_tpm_ordered)
all(rownames(metadata_filt) == colnames (genes_tpm_ordered)) #TRUE## [1] TRUE
samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "LPS"] # Includes same donor
metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]
genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)
deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])
gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")
names(metadata4deg) = tolower(names(metadata4deg))
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")
ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
geom_boxplot(notch = F, na.rm = T) +
geom_jitter() +
theme_bw() + facet_wrap(~symbol, scales = "free_y") +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "IFNy"] # Includes same donor
metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]
genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)
deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])
gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")
names(metadata4deg) = tolower(names(metadata4deg))
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")
ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
geom_boxplot(notch = F, na.rm = T) +
geom_jitter() +
theme_bw() + facet_wrap(~symbol, scales = "free_y") +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "R848"] # Includes same donor
metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]
genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)
deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])
gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")
names(metadata4deg) = tolower(names(metadata4deg))
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")
ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
geom_boxplot(notch = F, na.rm = T) +
geom_jitter() +
theme_bw() + facet_wrap(~symbol, scales = "free_y") +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "TNFa"] # Includes same donor
metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]
genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)
deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])
gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")
names(metadata4deg) = tolower(names(metadata4deg))
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")
ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
geom_boxplot(notch = F, na.rm = T) +
geom_jitter() +
theme_bw() + facet_wrap(~symbol, scales = "free_y") +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "DEX"] # Includes same donor
metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]
genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)
deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])
gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")
names(metadata4deg) = tolower(names(metadata4deg))
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")
ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
geom_boxplot(notch = F, na.rm = T) +
geom_jitter() +
theme_bw() + facet_wrap(~symbol, scales = "free_y") +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "IL4"] # Includes same donor
metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]
genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)
deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])
gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")
names(metadata4deg) = tolower(names(metadata4deg))
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")
ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
geom_boxplot(notch = F, na.rm = T) +
geom_jitter() +
theme_bw() + facet_wrap(~symbol, scales = "free_y") +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")samples_baseline = metadata_filt$Sample[metadata_filt$Stimulation %in% "unstim"] # Includes same donor
samples_condition = metadata_filt$Sample[metadata_filt$Stimulation %in% "ATP"] # Includes same donor
metadata4deg = metadata_filt[rownames(metadata_filt) %in% c(samples_baseline,samples_condition) ,]
genes_voom_baseline_condition = as.data.frame(genes_tpm_ordered[, colnames(genes_tpm_ordered) %in% c(samples_baseline, samples_condition)])
# dim(genes_voom_baseline_condition)
# dim(metadata4deg)
deg_lists = res_name_age[order(res_name_age$adj.P.Val) ,]
top_6 = head(deg_lists)
data.table(top_6)gene2check = as.data.frame(genes_voom_baseline_condition[top_6$ensembl, ])
gene2check$ensembl = rownames(gene2check)
gene2check = merge(gene2check, top_6[, c("symbol", "ensembl")], by = "ensembl")
names(metadata4deg) = tolower(names(metadata4deg))
gene2check_m = melt(gene2check, id.vars = c("ensembl", "symbol"))
gene2check_charac = merge(gene2check_m, metadata4deg, by.x = "variable", by.y = "sample")
ggplot(gene2check_charac, aes(x = stimulation, y = value, fill = stimulation)) +
geom_boxplot(notch = F, na.rm = T) +
geom_jitter() +
theme_bw() + facet_wrap(~symbol, scales = "free_y") +
ggpubr::stat_compare_means(label = "p.format", label.x.npc = "centre", method = "wilcox.test")res_age_diff_top = res_name_age[, c("ensembl", "symbol", "logFC", "AveExpr", "t", "P.Value", "adj.P.Val", "z.std")]
createDT(res_age_diff_top)## Warning in instance$preRenderHook(instance): It seems your data is too big
## for client-side DataTables. You may consider server-side processing: https://
## rstudio.github.io/DT/server.html